Calculate climate trends

Author

Max Lindmark

Published

September 9, 2023

Intro

This scripts calculates trends in biomass and climate variables from climate-agnostic sdm-predictions (“04-fit-sdms-random.qmd”)

Load packages & source functions

# Load libraries, install if needed
pkgs <- c("tidyverse", "tidylog", "RCurl", "devtools", "patchwork",
          "viridis", "RColorBrewer", "here", "raster", "concaveman") 

if(length(setdiff(pkgs,rownames(installed.packages()))) > 0){

    install.packages(setdiff(pkgs, rownames(installed.packages())), dependencies = T)
  
  }

invisible(lapply(pkgs, library, character.only = T))

# Set path
home <- here::here()

# Source code for map plots
# For some reason I can't source it directly from github...
# You need: # devtools::install_github("seananderson/ggsidekick") # not on CRAN; library(ggsidekick)
# devtools::source_url("https://raw.githubusercontent.com/maxlindmark/pred-prey-overlap/main/R/functions/map-plot.R")
source(paste0(home, "/R/functions/map-plot.R"))
options(ggplot2.continuous.colour = "viridis")

Read data

# Read prediction grids
d <- read_csv(paste0(home, "/output/data_pred_grids_04_random_sdms.csv")) |>
  separate(group, c("species", "life_stage"), sep = "_", remove = FALSE)
Rows: 5660568 Columns: 42
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (6): substrate, month_year, ices_rect, group, species, life_stage
dbl (36): X, Y, year, lon, lat, depth, quarter, month, oxy, temp, sal, sub_d...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Trim the prediction grid by species cumulative biomass

# Cumulative filter
# d_test <- data.frame(y = round(rnorm(10, 10, 2))) |> 
#   arrange(desc(y)) |>
#   mutate(cumsum = cumsum(y),
#          sum = sum(y),
#          perc = cumsum / sum)
# 
# d_test
# 
# d_test |> filter(perc < 0.95)

d_grid_keep <- d |>
  mutate(group = paste(group, quarter, sep = "_")) |> 
  group_by(X, Y, group) |> 
  # summarise across all years, but within group and quarter
  summarise(grid_mean = mean(est)) |>
  ungroup() |> 
  group_by(group) |>
  arrange(desc(grid_mean)) |> 
  mutate(cumsum = cumsum(grid_mean),
         sum = sum(grid_mean),
         perc = cumsum / sum) |> 
  ungroup() |> 
  filter(perc < 0.95) |> 
  mutate(id = paste(X, Y, group, sep = "_"))
mutate: changed 5,660,568 values (100%) of 'group' (0 new NA)
group_by: 3 grouping variables (X, Y, group)
summarise: now 195,192 rows and 4 columns, 2 group variables remaining (X, Y)
ungroup: no grouping variables
group_by: one grouping variable (group)
mutate (grouped): new variable 'cumsum' (double) with 195,192 unique values and 0% NA
                  new variable 'sum' (double) with 12 unique values and 0% NA
                  new variable 'perc' (double) with 195,181 unique values and 0% NA
ungroup: no grouping variables
filter: removed 21,973 rows (11%), 173,219 rows remaining
mutate: new variable 'id' (character) with 173,219 unique values and 0% NA
# Filter the main dataset to include these grid cells only
pred_grid_trim <- d |> 
  mutate(group = paste(group, quarter, sep = "_")) |> 
  mutate(id = paste(X, Y, group, sep = "_")) |> 
  filter(id %in% unique(d_grid_keep$id))
mutate: changed 5,660,568 values (100%) of 'group' (0 new NA)
mutate: new variable 'id' (character) with 195,192 unique values and 0% NA
filter: removed 637,217 rows (11%), 5,023,351 rows remaining
# Filter id's that are in the trends data frame before summarizing... because that is also trimmed by trends-quantiles. 
# So first trim by quantiles

Continue calculating average cell values

# filter the area-occupied-trimmed grid by the quantile-trimmed trends data frame
pred_grid_trim <- pred_grid_trim |> 
  filter(id %in% unique(trends$id))
filter: removed 156,513 rows (3%), 4,866,838 rows remaining
# ... Now that we have the final set of grid cells by species (after trimming by core area occupied and slope-quantiles), summarize grid cell average oxygen, temperature, and biomass
pred_grid_sum <- pred_grid_trim |> 
  group_by(id) |> 
  summarise(mean_log_biomass = mean(est),
            mean_temp = mean(temp),
            mean_oxy = mean(oxy),
            mean_phi = mean(phi)) |> 
  # Center the average across year variables
  mutate(mean_log_biomass_ct = mean_log_biomass - mean(mean_log_biomass),
         mean_temp_ct = mean_temp - mean(mean_temp),
         mean_oxy_ct = mean_oxy - mean(mean_oxy),
         mean_phi_ct = mean_phi - mean(mean_phi)) |>
  ungroup() |> 
  dplyr::select(id, mean_log_biomass, mean_temp, mean_oxy, mean_phi, 
                mean_log_biomass_ct, mean_temp_ct, mean_oxy_ct, mean_phi_ct)
group_by: one grouping variable (id)
summarise: now 167,822 rows and 5 columns, ungrouped
mutate: new variable 'mean_log_biomass_ct' (double) with 167,822 unique values and 0% NA
        new variable 'mean_temp_ct' (double) with 31,997 unique values and 0% NA
        new variable 'mean_oxy_ct' (double) with 31,996 unique values and 0% NA
        new variable 'mean_phi_ct' (double) with 167,822 unique values and 0% NA
ungroup: no grouping variables
# Join in the average variables to the trends data
trends <- trends |> 
  mutate(id = paste(X, Y, species, life_stage, quarter, sep = "_")) |> 
  left_join(pred_grid_sum, by = "id")
mutate: no changes
left_join: added 8 columns (mean_log_biomass, mean_temp, mean_oxy, mean_phi, mean_log_biomass_ct, …)
           > rows only in x         0
           > rows only in y  (      0)
           > matched rows     167,822
           >                 =========
           > rows total       167,822
# Scale variables: which group?
trends <- trends |>
  mutate(group = paste(species, life_stage, sep = "_")) |>
  group_by(group) |>
  mutate(temp_slope_sc = (temp_slope - mean(temp_slope)) / sd(temp_slope),
         oxy_slope_sc = (oxy_slope - mean(oxy_slope)) / sd(oxy_slope),
         phi_slope_sc = (phi_slope - mean(phi_slope)) / sd(phi_slope)) |>
  ungroup() |>
  mutate(quarter_f = as.factor(quarter))
mutate: new variable 'group' (character) with 6 unique values and 0% NA
group_by: one grouping variable (group)
mutate (grouped): new variable 'temp_slope_sc' (double) with 167,822 unique values and 0% NA
                  new variable 'oxy_slope_sc' (double) with 167,822 unique values and 0% NA
                  new variable 'phi_slope_sc' (double) with 167,822 unique values and 0% NA
ungroup: no grouping variables
mutate: new variable 'quarter_f' (factor) with 2 unique values and 0% NA
# Fix names
trends <- trends |>
  mutate(species = str_to_title(species),
         life_stage = str_to_title(life_stage)) |>
  mutate(group = paste(species, life_stage, sep = "_"))
mutate: changed 167,822 values (100%) of 'species' (0 new NA)
        changed 167,822 values (100%) of 'life_stage' (0 new NA)
mutate: changed 167,822 values (100%) of 'group' (0 new NA)

Save data

# Save data
write_csv(trends, paste0(home, "/data/clean/trends.csv"))